********************************************************************************
********************************************************************************
** Robustness analysis 1
** Potential endogeneity of private school learning premium.
** Using "failing the PSLE" as instrument for private school enrolment
********************************************************************************
********************************************************************************

** Load data
use "$dataraw_path\data_ftna_publication.dta", clear


* Construct estimated exam points
{
gen points = 0
foreach i in kiswahili english math society science{
	replace points = points + 5 if `i'_psle_num==0
	replace points = points + 15.5 if `i'_psle_num==1
	replace points = points + 25.5 if `i'_psle_num==2
	replace points = points + 35.5 if `i'_psle_num==3
	replace points = points + 45.5 if `i'_psle_num==4
}
replace points = points / 5
gen points_gpa = "A" if points>40
replace points_gpa = "B" if points>30 & points<=40
replace points_gpa = "C" if points>20 & points<=30
replace points_gpa = "D" if points>10 & points<=20
replace points_gpa = "E" if points>=0 & points<=10
replace points_gpa = "" if sample!=1
replace points = 10 if points>10 & average_grade_psle=="E"
replace points = 11 if points<=10 & average_grade_psle=="D"
replace points = 20 if points>20 & average_grade_psle=="D"
replace points = 21 if points<=20 & average_grade_psle=="C"
replace points = 30 if points>30 & average_grade_psle=="C"
replace points = 31 if points<=30 & average_grade_psle=="B"
replace points = 40 if points>40 & average_grade_psle=="B"
replace points = 41 if points<=40 & average_grade_psle=="A"
}
*


** RDD analysis
{
replace points = . if sample!=1

* Establishing fuzzy discontinuity
rdplot private points if points>=15.5 & points<=25.5, ///
	nbins(5 4) binselect(es) support(15.5 25.5) c(20.1) p(2) ///
	graph_options(graphregion(color(white)) legend(off) ///
	xtitle(Expected PSLE score) ytitle(Private secondary school enrolment))
graph export "$out_path\rdplot1.png", replace

* Presenting regression discontinuity plots
rdplot gpa_ftna_core_sd points if points>=15.5 & points<=25.5, ///
	nbins(5 4) binselect(es) support(15.5 25.5) c(20.1) p(2) ///
	graph_options(graphregion(color(white)) legend(off) ///
	xtitle(Expected PSLE score) ytitle(Standardized GPA (FTNA)))
graph export "$out_path\rdplot2.png", replace

rdplot gpa_ftna_core_sd points if points>=15.5 & points<=25.5, ///
	nbins(5 5) binselect(qs) support(15.5 25.5) c(20.1) p(2) ///
	graph_options(graphregion(color(white)) legend(off) ///
	xtitle(Estimated PSLE score) ytitle(Standardized GPA (FTNA))) /* Same conclusion when using "qs" as bin selection method. */

* Estimating treatment effect at cutoff point
gen pass = 1-fail
gen dist_to_cutoff = points - 20
gen dist_to_cutoff_sq = dist_to_cutoff^2

eststo rob1_1: areg gpa_ftna_core_sd private female ///
	uncommon_name peers_score_core_sd peers_fail_share peers_as_share ///
	gpa_psle_other_sd ///
	if points>=15.5 & points<=25.5, cl(school_id) a(group_id)

eststo rob1_2: reg private pass ///
	dist_to_cutoff dist_to_cutoff_sq ///
	c.dist_to_cutoff#1.pass c.dist_to_cutoff_sq#1.pass ///
	if points>=15.5 & points<=25.5, cl(school_id)
capture drop private_hat
predict private_hat, xb

preserve
	replace private = private_hat
	eststo rob1_3: reg gpa_ftna_core_sd private ///
		dist_to_cutoff dist_to_cutoff_sq ///
		c.dist_to_cutoff#1.pass c.dist_to_cutoff_sq#1.pass ///
		if points>=15.5 & points<=25.5, cl(school_id)
restore

eststo rob1_4: areg private pass female uncommon_name ///
	peers_score_core_sd gpa_psle_other_sd peers_fail_share peers_as_share ///
	dist_to_cutoff dist_to_cutoff_sq ///
	c.dist_to_cutoff#1.pass c.dist_to_cutoff_sq#1.pass ///
	if points>=15.5 & points<=25.5, cl(school_id) a(school_id_psle_year)
capture drop private_hat
predict private_hat, xb

preserve
	replace private = private_hat
	eststo rob1_5: areg gpa_ftna_core_sd private female uncommon_name ///
		peers_score_core_sd gpa_psle_other_sd peers_fail_share peers_as_share ///
		dist_to_cutoff dist_to_cutoff_sq ///
		c.dist_to_cutoff#1.pass c.dist_to_cutoff_sq#1.pass ///
		if points>=15.5 & points<=25.5, cl(school_id) a(school_id_psle_year)
restore

* Output
esttab rob1_1 rob1_2 rob1_3 rob1_4 rob1_5 using "$out_path\tablea7.tex", ///
	replace se stats(N r2, fmt(%12.3gc) labels("\(N\)" "\(R^2\)")) compress ///
	nomtitles starlevels("" 0.000000000000001) substitute(\_ _) b(3) ///
	/*KEEP*/k(private pass dist_to_cutoff dist_to_cutoff_sq ///
	1.pass#c.dist_to_cutoff 1.pass#c.dist_to_cutoff_sq female uncommon_name ///
	peers_score_core_sd peers_fail_share peers_as_share gpa_psle_other_sd) ///
	/*ORDER*/o(private pass dist_to_cutoff dist_to_cutoff_sq ///
	1.pass#c.dist_to_cutoff 1.pass#c.dist_to_cutoff_sq female uncommon_name ///
	peers_score_core_sd peers_fail_share peers_as_share gpa_psle_other_sd) ///
	/*LABELS*/varl(private "\$Private_s$" pass "\$Pass$" ///
	dist_to_cutoff "\$\textit{Distance to cut-off}$" ///
	dist_to_cutoff_sq "\$\textit{Distance to cut-off }^2$" ///
	1.pass#c.dist_to_cutoff "\$\textit{Pass}\times\textit{Distance to cut-off}$" ///
	1.pass#c.dist_to_cutoff_sq "\$\textit{Pass}\times\textit{Distance to cut-off }^2$" ///
	female "\$Female$" ///
	uncommon_name "\$\textit{Uncommon name}$" ///
	peers_score_core_sd "\$\textit{Peers PSLE}_{s}$" ///
	peers_fail_share "\$\textit{Peers failed (PSLE)}_{s}$" ///
	peers_as_share "\$\textit{Peers with A (PSLE)}_{s}$" ///
	gpa_psle_other_sd "\$\textit{GPA other}_{p} \textit{ (PSLE)}$")
}
*


** Using other covariates as the outcome variable
{
gen urban = (region=="DAR ES SALAAM" | inlist(district_id,2,16,34,64,77, ///
	91,118,124))
gen same_gender_school = (mix_school==0)
bys school_id year: egen school_size_ftna = count(school_id)
gen l_school_size_ftna = log(school_size_ftna)

eststo rob1_6: areg urban pass female uncommon_name ///
	peers_score_core_sd gpa_psle_other_sd peers_fail_share peers_as_share ///
	dist_to_cutoff dist_to_cutoff_sq ///
	c.dist_to_cutoff#1.pass c.dist_to_cutoff_sq#1.pass ///
	if points>=15.5 & points<=25.5, cl(school_id) a(school_id_psle_year)

eststo rob1_7: areg same_gender_school pass female uncommon_name ///
	peers_score_core_sd gpa_psle_other_sd peers_fail_share peers_as_share ///
	dist_to_cutoff dist_to_cutoff_sq ///
	c.dist_to_cutoff#1.pass c.dist_to_cutoff_sq#1.pass ///
	if points>=15.5 & points<=25.5, cl(school_id) a(school_id_psle_year)

eststo rob1_8: areg religion pass female uncommon_name ///
	peers_score_core_sd gpa_psle_other_sd peers_fail_share peers_as_share ///
	dist_to_cutoff dist_to_cutoff_sq ///
	c.dist_to_cutoff#1.pass c.dist_to_cutoff_sq#1.pass ///
	if points>=15.5 & points<=25.5, cl(school_id) a(school_id_psle_year)

eststo rob1_9: areg l_school_size_ftna pass female uncommon_name ///
	peers_score_core_sd gpa_psle_other_sd peers_fail_share peers_as_share ///
	dist_to_cutoff dist_to_cutoff_sq ///
	c.dist_to_cutoff#1.pass c.dist_to_cutoff_sq#1.pass ///
	if points>=15.5 & points<=25.5, cl(school_id) a(school_id_psle_year)

drop urban same_gender_school school_size_ftna l_school_size_ftna

* Output
esttab rob1_6 rob1_7 rob1_8 rob1_9 using "$out_path\tablea8.tex", ///
	replace se stats(N r2, fmt(%12.3gc) labels("\(N\)" "\(R^2\)")) compress ///
	nomtitles starlevels("" 0.000000000000001) substitute(\_ _) b(3) ///
	/*KEEP*/k(pass dist_to_cutoff dist_to_cutoff_sq 1.pass#c.dist_to_cutoff ///
	1.pass#c.dist_to_cutoff_sq) ///
	/*ORDER*/o(pass dist_to_cutoff dist_to_cutoff_sq 1.pass#c.dist_to_cutoff ///
	1.pass#c.dist_to_cutoff_sq) ///
	/*LABELS*/varl(pass "\$Pass$" dist_to_cutoff "\$\textit{Distance to cut-off}$" ///
	dist_to_cutoff_sq "\$\textit{Distance to cut-off }^2$" ///
	1.pass#c.dist_to_cutoff "\$\textit{Pass}\times\textit{Distance to cut-off}$" ///
	1.pass#c.dist_to_cutoff_sq "\$\textit{Pass}\times\textit{Distance to cut-off }^2$")
}
*


** Examining whether students are pushed into private education
{
preserve

	rename id_psle id_psle_pop
	drop *_psle_num average_grade_psle fail school_id_psle
	
	merge m:1 id_psle_pop using "$dataraw_path\data_psle_publication.dta"
	drop if _merge==1
	
	rename school_id_psle_int school_id_psle_int
	
	gen found = (id_ftna!=.)	
	gen found_private = (found==1 & private==1)
	
	gen fail = 1 if inlist(average_grade_psle_pop,"D","E")
	replace fail = 0 if inlist(average_grade_psle_pop,"A","B","C")
	
	foreach var of varlist *psle_num_pop{
		replace `var' = "" if `var'=="nan"
		destring `var', replace
	}
	
	egen group_id2 = group(*_psle_num_pop school_id_psle_int year_pop)
	bys group_id2: egen temp_min = min(fail)
	bys group_id2: egen temp_max = max(fail)
	gen fail_pass2 = (temp_min==0 & temp_max==1)
	xtset group_id2
	xtlogit found_private fail if fail_pass2==1, fe
	predict yhat_logit if e(sample)
	gen yhat_logit_pass = yhat_logit if fail==0
	bys group_id2: egen yhat_logit_pass_mean = mean(yhat_logit_pass)
	gen yhat_diff = yhat_logit - yhat_logit_pass_mean if fail==1
	sum yhat_diff
	local yhat_diff_mean = `r(mean)'
	sum yhat_logit if fail==0
	local yhat_pass = `r(mean)'
	di `yhat_diff_mean' / `yhat_pass'

restore
}
*
